library(tidyverse)
library(DT)
names <- read_csv(here::here("supporting_artifacts",
"learning_targets",
"Lab 9",
"StateNames_A.csv"))Challenge 9
Data Import and Packages
datatable(names)Warning in instance$preRenderHook(instance): It seems your data is too big
for client-side DataTables. You may consider server-side processing: https://
rstudio.github.io/DT/server.html
Part 1: Summarizing and Visualizing Allisons
Question 1
names <- names |>
mutate(Sex = Gender)
allisonname <- names |>
select(Name, Year, State, Count, Sex) |>
group_by(State, Sex) |>
filter(Name == "Allison") |>
summarize(Count = sum(Count), .groups = "keep") |>
pivot_wider(names_from = Sex, values_from = Count) |>
mutate(M = coalesce(NA, 0))
knitr::kable(allisonname[, 1:3],
col.names = c("State",
"Total Sum of Female",
"Total Sum of Male" ),
"html")| State | Total Sum of Female | Total Sum of Male |
|---|---|---|
| AK | 232 | 0 |
| AL | 1535 | 0 |
| AR | 1198 | 0 |
| AZ | 1880 | 0 |
| CA | 12413 | 0 |
| CO | 1594 | 0 |
| CT | 1099 | 0 |
| DC | 321 | 0 |
| DE | 294 | 0 |
| FL | 4455 | 0 |
| GA | 3257 | 0 |
| HI | 183 | 0 |
| IA | 1477 | 0 |
| ID | 451 | 0 |
| IL | 5110 | 0 |
| IN | 3067 | 0 |
| KS | 1283 | 0 |
| KY | 1905 | 0 |
| LA | 1209 | 0 |
| MA | 2218 | 0 |
| MD | 2229 | 0 |
| ME | 340 | 0 |
| MI | 4014 | 0 |
| MN | 2374 | 0 |
| MO | 2882 | 0 |
| MS | 817 | 0 |
| MT | 226 | 0 |
| NC | 3435 | 0 |
| ND | 285 | 0 |
| NE | 807 | 0 |
| NH | 412 | 0 |
| NJ | 3052 | 0 |
| NM | 399 | 0 |
| NV | 729 | 0 |
| NY | 5747 | 0 |
| OH | 5487 | 0 |
| OK | 1421 | 0 |
| OR | 1186 | 0 |
| PA | 4307 | 0 |
| RI | 306 | 0 |
| SC | 1228 | 0 |
| SD | 376 | 0 |
| TN | 2488 | 0 |
| TX | 10192 | 0 |
| UT | 1125 | 0 |
| VA | 3220 | 0 |
| VT | 135 | 0 |
| WA | 1956 | 0 |
| WI | 2367 | 0 |
| WV | 813 | 0 |
| WY | 142 | 0 |
Question 2
allisonname_F <- names |>
filter(Name == "Allison", Sex == "F")Question 3
allisonname_f_byYear <- allisonname_F |>
group_by(Year) |>
summarize(Count = sum(Count))
ggplot(data = allisonname_f_byYear, mapping = aes(x = Year, y = Count)) +
geom_col() +
labs(title = 'Popularity of the name "Allison" over time')
Part 2: Modeling the Number of Allisons
Question 4
Model1 <- allisonname_f_byYear |>
lm(Count ~ Year, data = _)Question 5
Model1 |>
ggplot(mapping = aes(y = Count, x = Year)) +
geom_point() +
stat_smooth(method = "lm")`geom_smooth()` using formula 'y ~ x'

Question 6
lm(Count ~ Year, data = allisonname_f_byYear)
Call:
lm(formula = Count ~ Year, data = allisonname_f_byYear)
Coefficients:
(Intercept) Year
209689.8 -101.5
#Estimated Regression Equation: Count = 209689.8 - 101.5(Year)Question 7
Model1 |>
broom::augment() |>
ggplot(mapping = aes(y = .resid, x = .fitted)) +
geom_point()
In the plot of the residuals against the fitted values, we do not see any discernible pattern.
Question 8
Our model shows that the name Allison is declining in popularity. Allison is still quite popular as about 5000 newborn babies were given that name in our most recent year of data.
Part 3: Spelling by State
Question 1
alan_name_M <- names |>
filter(Sex == "M", Name %in% c("Allan", "Alan", "Allen")) |>
group_by(Year)
ggplot(data = alan_name_M, mapping = aes(x = Year, y = Count)) +
geom_col() +
labs(title = 'Popularity of the name "Allen, Allan, Alan" over time')
Question 2
alannametotal <- alan_name_M |>
filter(Year == 2000, State %in% c("PA", "CA")) |>
pivot_wider(names_from = Name, values_from = Count) |>
select(Year, Sex, State, Alan, Allen, Allan)
knitr::kable(alannametotal[, 1:6],
col.names = c("Year", "Sex", "State",
"Count of Alan",
"Count of Allen",
"Count of Allan"),
"html")| Year | Sex | State | Count of Alan | Count of Allen | Count of Allan |
|---|---|---|---|---|---|
| 2000 | M | CA | 579 | 176 | 131 |
| 2000 | M | PA | 51 | 56 | 12 |
Question 3
alannamepercent <- alan_name_M |>
filter(Year == 2000, State %in% c("PA", "CA")) |>
group_by(State) |>
mutate(Count = Count/sum(Count)) |>
pivot_wider(names_from = Name, values_from = Count) |>
select(Year, Sex, State, Alan, Allen, Allan)
knitr::kable(alannamepercent[, 1:6],
col.names = c("Year", "Sex", "State",
"Count of Alan",
"Count of Allen",
"Count of Allan") ,
"html") %>%
kableExtra::kable_styling(latex_options = "striped", font_size = 13)%>%
kableExtra::row_spec(1:2, color = 'white', background = 'black')| Year | Sex | State | Count of Alan | Count of Allen | Count of Allan |
|---|---|---|---|---|---|
| 2000 | M | CA | 0.6534989 | 0.1986456 | 0.1478555 |
| 2000 | M | PA | 0.4285714 | 0.4705882 | 0.1008403 |